From 8239d41d04081c1130a1eb4d937dcee46452d844 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 7 Oct 2024 11:39:33 +0200 Subject: [PATCH 01/17] [Local App Snippet] support non conversational LLMs --- packages/tasks/src/local-apps.ts | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index edc7e64fd8..245bcf729e 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -92,15 +92,19 @@ function isMlxModel(model: ModelData) { } const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => { - const command = (binary: string) => - [ + const command = (binary: string) => { + let snippet = [ "# Load and run the model:", `${binary} \\`, ` --hf-repo "${model.id}" \\`, ` --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`, - ' -p "You are a helpful assistant" \\', - " --conversation", + ` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time "}"`, ].join("\n"); + if (model.tags.includes("conversational")) { + snippet += " \\\n --conversation"; + } + return snippet; + }; return [ { title: "Install from brew", @@ -178,7 +182,7 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] }; const snippetVllm = (model: ModelData): LocalAppSnippet[] => { - const runCommand = [ + const runCommandInstruct = [ "# Call the server using curl:", `curl -X POST "http://localhost:8000/v1/chat/completions" \\`, ` -H "Content-Type: application/json" \\`, @@ -189,6 +193,18 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => { ` ]`, ` }'`, ]; + const runCommandNonInstruct = [ + "# Call the server using curl:", + `curl -X POST "http://localhost:8000/v1/completions" \\`, + ` -H "Content-Type: application/json" \\`, + ` --data '{`, + ` "model": "${model.id}",`, + ` "prompt": "Once upon a time ",`, + ` "max_tokens": 512,`, + ` "temperature": 0.5`, + ` }'`, + ]; + const runCommand = model.tags.includes("conversational") ? runCommandInstruct : runCommandNonInstruct; return [ { title: "Install from pip", From 36761d56c77e024b081e797a23cdd4705dca1c17 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 7 Oct 2024 12:07:20 +0200 Subject: [PATCH 02/17] llama_cpp_python --- .../tasks/src/model-libraries-snippets.ts | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index 523d1a2457..72f34c1cf3 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -418,23 +418,36 @@ model = keras_hub.models.CausalLM.from_preset("hf://${model.id}", dtype="bfloat1 `, ]; -export const llama_cpp_python = (model: ModelData): string[] => [ - `from llama_cpp import Llama +export const llama_cpp_python = (model: ModelData): string[] => { + let snippet = `from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="${model.id}", filename="{{GGUF_FILE}}", ) -llm.create_chat_completion( +`; + + if (model.tags.includes("conversational")) { + snippet += `llm.create_chat_completion( messages = [ { "role": "user", "content": "What is the capital of France?" } ] -)`, -]; +)`; + } else { + snippet += `output = llm( + "Once upon a time ", + max_tokens=512, + echo=True +) +print(output)`; + } + + return [snippet]; +}; export const tf_keras = (model: ModelData): string[] => [ `# Note: 'keras<3.x' or 'tf_keras' must be installed (legacy) From c94a98646ecb188a9f8ceb6fe1006817b7212853 Mon Sep 17 00:00:00 2001 From: Mishig Date: Mon, 7 Oct 2024 13:29:19 +0200 Subject: [PATCH 03/17] Apply suggestions from code review Co-authored-by: vb --- packages/tasks/src/local-apps.ts | 4 ++-- packages/tasks/src/model-libraries-snippets.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index 245bcf729e..1e90a51ae3 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -98,7 +98,7 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] `${binary} \\`, ` --hf-repo "${model.id}" \\`, ` --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`, - ` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time "}"`, + ` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time"}"`, ].join("\n"); if (model.tags.includes("conversational")) { snippet += " \\\n --conversation"; @@ -199,7 +199,7 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => { ` -H "Content-Type: application/json" \\`, ` --data '{`, ` "model": "${model.id}",`, - ` "prompt": "Once upon a time ",`, + ` "prompt": "Once upon a time",`, ` "max_tokens": 512,`, ` "temperature": 0.5`, ` }'`, diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index 72f34c1cf3..a0ed3c4c50 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -439,7 +439,7 @@ llm = Llama.from_pretrained( )`; } else { snippet += `output = llm( - "Once upon a time ", + "Once upon a time", max_tokens=512, echo=True ) From f61ac3c24d69d819cd9a50804e8d3bb48588b99c Mon Sep 17 00:00:00 2001 From: Mishig Date: Mon, 7 Oct 2024 13:34:44 +0200 Subject: [PATCH 04/17] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Victor Muštar --- packages/tasks/src/local-apps.ts | 2 +- packages/tasks/src/model-libraries-snippets.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index 1e90a51ae3..bd906e274a 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -199,7 +199,7 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => { ` -H "Content-Type: application/json" \\`, ` --data '{`, ` "model": "${model.id}",`, - ` "prompt": "Once upon a time",`, + ` "prompt": "Once upon a time,",`, ` "max_tokens": 512,`, ` "temperature": 0.5`, ` }'`, diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index a0ed3c4c50..d4505cd9a6 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -439,7 +439,7 @@ llm = Llama.from_pretrained( )`; } else { snippet += `output = llm( - "Once upon a time", + "Once upon a time,", max_tokens=512, echo=True ) From bd09de1e4b5ce24ff967cb4d3ad2ce98b2849ee6 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 7 Oct 2024 16:09:24 +0200 Subject: [PATCH 05/17] Add test cases --- packages/tasks/package.json | 5 +- packages/tasks/src/local-apps.spec.ts | 82 +++++++++++++++++++ .../src/model-libraries-snippets.spec.ts | 53 ++++++++++++ 3 files changed, 139 insertions(+), 1 deletion(-) create mode 100644 packages/tasks/src/local-apps.spec.ts create mode 100644 packages/tasks/src/model-libraries-snippets.spec.ts diff --git a/packages/tasks/package.json b/packages/tasks/package.json index e9e64c0bf8..3ada081d9e 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -33,7 +33,10 @@ "watch": "npm-run-all --parallel watch:esm watch:cjs", "prepare": "pnpm run build", "check": "tsc", - "test": "vitest run" + "test": "vitest run", + "inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts", + "inference-tgi-import": "tsx scripts/inference-tgi-import.ts && prettier --write src/tasks/text-generation/spec/*.json && prettier --write src/tasks/chat-completion/spec/*.json", + "inference-tei-import": "tsx scripts/inference-tei-import.ts && prettier --write src/tasks/feature-extraction/spec/*.json" }, "type": "module", "files": [ diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts new file mode 100644 index 0000000000..af71e317c0 --- /dev/null +++ b/packages/tasks/src/local-apps.spec.ts @@ -0,0 +1,82 @@ +import { describe, expect, it } from "vitest"; +import { LOCAL_APPS } from "./local-apps"; +import type { ModelData } from "./model-data"; + +describe("local-apps", () => { + it("llama.cpp conversational", async () => { + const { snippet: snippetFunc } = LOCAL_APPS["llama.cpp"]; + const model: ModelData = { + id: "mlabonne/gemma-2b-it-GGUF", + tags: ["conversational"], + inference: "", + }; + const snippet = snippetFunc(model); + + expect(snippet[0].content).toEqual(`# Load and run the model: +llama-cli \\ + --hf-repo "mlabonne/gemma-2b-it-GGUF" \\ + --hf-file {{GGUF_FILE}} \\ + -p "You are a helpful assistant" \\ + --conversation`); + }); + + it("llama.cpp non-conversational", async () => { + const { snippet: snippetFunc } = LOCAL_APPS["llama.cpp"]; + const model: ModelData = { + id: "mlabonne/gemma-2b-GGUF", + tags: [], + inference: "", + }; + const snippet = snippetFunc(model); + + expect(snippet[0].content).toEqual(`# Load and run the model: +llama-cli \\ + --hf-repo "mlabonne/gemma-2b-GGUF" \\ + --hf-file {{GGUF_FILE}} \\ + -p "Once upon a time"`); + }); + + it("vLLM conversational", async () => { + const { snippet: snippetFunc } = LOCAL_APPS["vllm"]; + const model: ModelData = { + id: "meta-llama/Llama-3.2-3B-Instruct", + tags: ["conversational"], + inference: "", + }; + const snippet = snippetFunc(model); + + expect((snippet[0].content as string[]).join("\n")).toEqual(`# Load and run the model: +vllm serve "meta-llama/Llama-3.2-3B-Instruct" +# Call the server using curl: +curl -X POST "http://localhost:8000/v1/chat/completions" \\ + -H "Content-Type: application/json" \\ + --data '{ + "model": "meta-llama/Llama-3.2-3B-Instruct", + "messages": [ + {"role": "user", "content": "Hello!"} + ] + }'`); + }); + + it("vLLM non-conversational", async () => { + const { snippet: snippetFunc } = LOCAL_APPS["vllm"]; + const model: ModelData = { + id: "meta-llama/Llama-3.2-3B", + tags: [""], + inference: "", + }; + const snippet = snippetFunc(model); + + expect((snippet[0].content as string[]).join("\n")).toEqual(`# Load and run the model: +vllm serve "meta-llama/Llama-3.2-3B" +# Call the server using curl: +curl -X POST "http://localhost:8000/v1/completions" \\ + -H "Content-Type: application/json" \\ + --data '{ + "model": "meta-llama/Llama-3.2-3B", + "prompt": "Once upon a time,", + "max_tokens": 512, + "temperature": 0.5 + }'`); + }); +}); diff --git a/packages/tasks/src/model-libraries-snippets.spec.ts b/packages/tasks/src/model-libraries-snippets.spec.ts new file mode 100644 index 0000000000..d1bce3fe16 --- /dev/null +++ b/packages/tasks/src/model-libraries-snippets.spec.ts @@ -0,0 +1,53 @@ +import { describe, expect, it } from "vitest"; +import type { ModelData } from "./model-data"; +import { llama_cpp_python } from "./model-libraries-snippets"; + +describe("model-libraries-snippets", () => { + it("llama_cpp_python conversational", async () => { + const model: ModelData = { + id: "mlabonne/gemma-2b-it-GGUF", + tags: ["conversational"], + inference: "", + }; + const snippet = llama_cpp_python(model); + + expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama + +llm = Llama.from_pretrained( + repo_id="mlabonne/gemma-2b-it-GGUF", + filename="{{GGUF_FILE}}", +) + +llm.create_chat_completion( + messages = [ + { + "role": "user", + "content": "What is the capital of France?" + } + ] +)`); + }); + + it("llama_cpp_python non-conversational", async () => { + const model: ModelData = { + id: "mlabonne/gemma-2b-it-GGUF", + tags: [""], + inference: "", + }; + const snippet = llama_cpp_python(model); + + expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama + +llm = Llama.from_pretrained( + repo_id="mlabonne/gemma-2b-it-GGUF", + filename="{{GGUF_FILE}}", +) + +output = llm( + "Once upon a time,", + max_tokens=512, + echo=True +) +print(output)`); + }); +}); From 968bc02e5e66eb5a05a12021f2f47496358ed8b8 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 7 Oct 2024 16:14:02 +0200 Subject: [PATCH 06/17] prefer to use array const --- packages/tasks/src/local-apps.ts | 9 +++++---- packages/tasks/src/model-libraries-snippets.ts | 17 +++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index bd906e274a..8a6a9528eb 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -93,17 +93,18 @@ function isMlxModel(model: ModelData) { const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => { const command = (binary: string) => { - let snippet = [ + const snippet = [ "# Load and run the model:", `${binary} \\`, ` --hf-repo "${model.id}" \\`, ` --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`, ` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time"}"`, - ].join("\n"); + ]; if (model.tags.includes("conversational")) { - snippet += " \\\n --conversation"; + snippet[snippet.length - 1] += " \\"; + snippet.push(" --conversation"); } - return snippet; + return snippet.join("\n"); }; return [ { diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index d4505cd9a6..9b51878755 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -419,34 +419,35 @@ model = keras_hub.models.CausalLM.from_preset("hf://${model.id}", dtype="bfloat1 ]; export const llama_cpp_python = (model: ModelData): string[] => { - let snippet = `from llama_cpp import Llama + const snippets = [ + `from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="${model.id}", filename="{{GGUF_FILE}}", ) - -`; +`, + ]; if (model.tags.includes("conversational")) { - snippet += `llm.create_chat_completion( + snippets.push(`llm.create_chat_completion( messages = [ { "role": "user", "content": "What is the capital of France?" } ] -)`; +)`); } else { - snippet += `output = llm( + snippets.push(`output = llm( "Once upon a time,", max_tokens=512, echo=True ) -print(output)`; +print(output)`); } - return [snippet]; + return snippets; }; export const tf_keras = (model: ModelData): string[] => [ From 524e965051e8e216a8783b74875b558d7d2c4756 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 7 Oct 2024 16:16:29 +0200 Subject: [PATCH 07/17] real examples --- packages/tasks/src/local-apps.spec.ts | 4 ++-- packages/tasks/src/model-libraries-snippets.spec.ts | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts index af71e317c0..b0e4590299 100644 --- a/packages/tasks/src/local-apps.spec.ts +++ b/packages/tasks/src/local-apps.spec.ts @@ -6,7 +6,7 @@ describe("local-apps", () => { it("llama.cpp conversational", async () => { const { snippet: snippetFunc } = LOCAL_APPS["llama.cpp"]; const model: ModelData = { - id: "mlabonne/gemma-2b-it-GGUF", + id: "bartowski/Llama-3.2-3B-Instruct-GGUF", tags: ["conversational"], inference: "", }; @@ -14,7 +14,7 @@ describe("local-apps", () => { expect(snippet[0].content).toEqual(`# Load and run the model: llama-cli \\ - --hf-repo "mlabonne/gemma-2b-it-GGUF" \\ + --hf-repo "bartowski/Llama-3.2-3B-Instruct-GGUF" \\ --hf-file {{GGUF_FILE}} \\ -p "You are a helpful assistant" \\ --conversation`); diff --git a/packages/tasks/src/model-libraries-snippets.spec.ts b/packages/tasks/src/model-libraries-snippets.spec.ts index d1bce3fe16..b0f2f8949e 100644 --- a/packages/tasks/src/model-libraries-snippets.spec.ts +++ b/packages/tasks/src/model-libraries-snippets.spec.ts @@ -5,7 +5,7 @@ import { llama_cpp_python } from "./model-libraries-snippets"; describe("model-libraries-snippets", () => { it("llama_cpp_python conversational", async () => { const model: ModelData = { - id: "mlabonne/gemma-2b-it-GGUF", + id: "bartowski/Llama-3.2-3B-Instruct-GGUF", tags: ["conversational"], inference: "", }; @@ -14,7 +14,7 @@ describe("model-libraries-snippets", () => { expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama llm = Llama.from_pretrained( - repo_id="mlabonne/gemma-2b-it-GGUF", + repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", filename="{{GGUF_FILE}}", ) @@ -30,7 +30,7 @@ llm.create_chat_completion( it("llama_cpp_python non-conversational", async () => { const model: ModelData = { - id: "mlabonne/gemma-2b-it-GGUF", + id: "mlabonne/gemma-2b-GGUF", tags: [""], inference: "", }; @@ -39,7 +39,7 @@ llm.create_chat_completion( expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama llm = Llama.from_pretrained( - repo_id="mlabonne/gemma-2b-it-GGUF", + repo_id="mlabonne/gemma-2b-GGUF", filename="{{GGUF_FILE}}", ) From de46212188857c6850863897d4f604f2741a2b6a Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 7 Oct 2024 16:17:58 +0200 Subject: [PATCH 08/17] "once upon a time," example --- packages/tasks/src/local-apps.spec.ts | 2 +- packages/tasks/src/local-apps.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts index b0e4590299..9c8c82a2aa 100644 --- a/packages/tasks/src/local-apps.spec.ts +++ b/packages/tasks/src/local-apps.spec.ts @@ -33,7 +33,7 @@ llama-cli \\ llama-cli \\ --hf-repo "mlabonne/gemma-2b-GGUF" \\ --hf-file {{GGUF_FILE}} \\ - -p "Once upon a time"`); + -p "Once upon a time,"`); }); it("vLLM conversational", async () => { diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index 8a6a9528eb..d00cf1ae7f 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -98,7 +98,7 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] `${binary} \\`, ` --hf-repo "${model.id}" \\`, ` --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`, - ` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time"}"`, + ` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time,"}"`, ]; if (model.tags.includes("conversational")) { snippet[snippet.length - 1] += " \\"; From 6761b2062f3b582ff120d5e42623ac2b8e155c97 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 18 Nov 2024 11:48:34 +0100 Subject: [PATCH 09/17] fix rebase --- packages/tasks/package.json | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 3ada081d9e..e9e64c0bf8 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -33,10 +33,7 @@ "watch": "npm-run-all --parallel watch:esm watch:cjs", "prepare": "pnpm run build", "check": "tsc", - "test": "vitest run", - "inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts", - "inference-tgi-import": "tsx scripts/inference-tgi-import.ts && prettier --write src/tasks/text-generation/spec/*.json && prettier --write src/tasks/chat-completion/spec/*.json", - "inference-tei-import": "tsx scripts/inference-tei-import.ts && prettier --write src/tasks/feature-extraction/spec/*.json" + "test": "vitest run" }, "type": "module", "files": [ From 04e8f0cb2f1c8bfb72357d71d71788e844243e87 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 18 Nov 2024 11:53:21 +0100 Subject: [PATCH 10/17] fix imports --- packages/tasks/src/local-apps.spec.ts | 4 ++-- packages/tasks/src/model-libraries-snippets.spec.ts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts index 9c8c82a2aa..b6ebca333c 100644 --- a/packages/tasks/src/local-apps.spec.ts +++ b/packages/tasks/src/local-apps.spec.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from "vitest"; -import { LOCAL_APPS } from "./local-apps"; -import type { ModelData } from "./model-data"; +import { LOCAL_APPS } from "./local-apps.js"; +import type { ModelData } from "./model-data.js"; describe("local-apps", () => { it("llama.cpp conversational", async () => { diff --git a/packages/tasks/src/model-libraries-snippets.spec.ts b/packages/tasks/src/model-libraries-snippets.spec.ts index b0f2f8949e..7ca049f3ff 100644 --- a/packages/tasks/src/model-libraries-snippets.spec.ts +++ b/packages/tasks/src/model-libraries-snippets.spec.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from "vitest"; -import type { ModelData } from "./model-data"; -import { llama_cpp_python } from "./model-libraries-snippets"; +import type { ModelData } from "./model-data.js"; +import { llama_cpp_python } from "./model-libraries-snippets.js"; describe("model-libraries-snippets", () => { it("llama_cpp_python conversational", async () => { From d4e7fcd4436a969f2acb8984bfdbe3e35e4ab9ce Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 18 Nov 2024 12:16:52 +0100 Subject: [PATCH 11/17] simplify strings --- packages/tasks/src/local-apps.ts | 45 +++++++++++++++----------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index d00cf1ae7f..f54864720f 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -183,34 +183,31 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] }; const snippetVllm = (model: ModelData): LocalAppSnippet[] => { - const runCommandInstruct = [ - "# Call the server using curl:", - `curl -X POST "http://localhost:8000/v1/chat/completions" \\`, - ` -H "Content-Type: application/json" \\`, - ` --data '{`, - ` "model": "${model.id}",`, - ` "messages": [`, - ` {"role": "user", "content": "Hello!"}`, - ` ]`, - ` }'`, - ]; - const runCommandNonInstruct = [ - "# Call the server using curl:", - `curl -X POST "http://localhost:8000/v1/completions" \\`, - ` -H "Content-Type: application/json" \\`, - ` --data '{`, - ` "model": "${model.id}",`, - ` "prompt": "Once upon a time,",`, - ` "max_tokens": 512,`, - ` "temperature": 0.5`, - ` }'`, - ]; + // todo: lets get the messages here dawg + const runCommandInstruct = `# Call the server using curl: +curl -X POST "http://localhost:8000/v1/chat/completions" \\ + -H "Content-Type: application/json" \\ + --data '{ + "model": "${model.id}", + "messages": [ + {"role": "user", "content": "Hello!"} + ] + }'`; + const runCommandNonInstruct = `# Call the server using curl: +curl -X POST "http://localhost:8000/v1/completions" \\ + -H "Content-Type: application/json" \\ + --data '{ + "model": "${model.id}", + "prompt": "Once upon a time,", + "max_tokens": 512, + "temperature": 0.5 + }'`; const runCommand = model.tags.includes("conversational") ? runCommandInstruct : runCommandNonInstruct; return [ { title: "Install from pip", setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"), - content: [`# Load and run the model:\nvllm serve "${model.id}"`, runCommand.join("\n")], + content: [`# Load and run the model:\nvllm serve "${model.id}"`, runCommand], }, { title: "Use Docker images", @@ -227,7 +224,7 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => { ].join("\n"), content: [ `# Load and run the model:\ndocker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`, - runCommand.join("\n"), + runCommand, ], }, ]; From 0485bd1f950df7d31f69d199db9d87f053c4ac02 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 18 Nov 2024 12:16:57 +0100 Subject: [PATCH 12/17] format --- packages/tasks/src/index.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/tasks/src/index.ts b/packages/tasks/src/index.ts index f11661a134..c350fecd2e 100644 --- a/packages/tasks/src/index.ts +++ b/packages/tasks/src/index.ts @@ -51,7 +51,6 @@ export * from "./gguf.js"; export { snippets }; export type { InferenceSnippet } from "./snippets/index.js"; - export { SKUS, DEFAULT_MEMORY_OPTIONS } from "./hardware.js"; export type { HardwareSpec, SkuType } from "./hardware.js"; export { LOCAL_APPS } from "./local-apps.js"; From 2e7c0808ca75a10222d4fc9bcdda1fff7660aa19 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 18 Nov 2024 12:25:39 +0100 Subject: [PATCH 13/17] use shared example message --- packages/tasks/src/local-apps.spec.ts | 6 +++++- packages/tasks/src/local-apps.ts | 13 +++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts index b6ebca333c..a1e1752712 100644 --- a/packages/tasks/src/local-apps.spec.ts +++ b/packages/tasks/src/local-apps.spec.ts @@ -40,6 +40,7 @@ llama-cli \\ const { snippet: snippetFunc } = LOCAL_APPS["vllm"]; const model: ModelData = { id: "meta-llama/Llama-3.2-3B-Instruct", + pipeline_tag: "text-generation", tags: ["conversational"], inference: "", }; @@ -53,7 +54,10 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\ --data '{ "model": "meta-llama/Llama-3.2-3B-Instruct", "messages": [ - {"role": "user", "content": "Hello!"} + { + "role": "user", + "content": "What is the capital of France?" + } ] }'`); }); diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index f54864720f..a1d2c119e1 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -1,6 +1,9 @@ import { parseGGUFQuantLabel } from "./gguf.js"; import type { ModelData } from "./model-data.js"; import type { PipelineType } from "./pipelines.js"; +import { stringifyMessages } from "./snippets/common.js"; +import { getModelInputSnippet } from "./snippets/inputs.js"; +import { ChatCompletionInputMessage } from "./tasks/index.js"; export interface LocalAppSnippet { /** @@ -183,15 +186,17 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] }; const snippetVllm = (model: ModelData): LocalAppSnippet[] => { - // todo: lets get the messages here dawg + const messages = getModelInputSnippet(model) as ChatCompletionInputMessage[]; const runCommandInstruct = `# Call the server using curl: curl -X POST "http://localhost:8000/v1/chat/completions" \\ -H "Content-Type: application/json" \\ --data '{ "model": "${model.id}", - "messages": [ - {"role": "user", "content": "Hello!"} - ] + "messages": ${stringifyMessages(messages, { + indent: "\t\t", + attributeKeyQuotes: true, + customContentEscaper: (str) => str.replace(/'/g, "'\\''"), + })} }'`; const runCommandNonInstruct = `# Call the server using curl: curl -X POST "http://localhost:8000/v1/completions" \\ From 31632a19dd00126135861951be140ff6eabf8bb2 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 18 Nov 2024 14:00:45 +0100 Subject: [PATCH 14/17] vLLM VLM snippet support --- packages/tasks/src/local-apps.spec.ts | 37 +++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts index a1e1752712..8fc56cbe59 100644 --- a/packages/tasks/src/local-apps.spec.ts +++ b/packages/tasks/src/local-apps.spec.ts @@ -83,4 +83,41 @@ curl -X POST "http://localhost:8000/v1/completions" \\ "temperature": 0.5 }'`); }); + + it("vLLM VLM conversational", async () => { + const { snippet: snippetFunc } = LOCAL_APPS["vllm"]; + const model: ModelData = { + id: "meta-llama/Llama-3.2-11B-Vision-Instruct", + pipeline_tag: "image-text-to-text", + tags: ["conversational"], + inference: "", + }; + const snippet = snippetFunc(model); + + expect((snippet[0].content as string[]).join("\n")).toEqual(`# Load and run the model: +vllm serve "meta-llama/Llama-3.2-11B-Vision-Instruct" +# Call the server using curl: +curl -X POST "http://localhost:8000/v1/chat/completions" \\ + -H "Content-Type: application/json" \\ + --data '{ + "model": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "Describe this image in one sentence." + }, + { + "type": "image_url", + "image_url": { + "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" + } + } + ] + } + ] + }'`); + }); }); From d6c5b5b597732253a58c58c2912479fc21d91b6c Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Mon, 18 Nov 2024 14:06:06 +0100 Subject: [PATCH 15/17] match naming --- packages/tasks/src/local-apps.spec.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts index 8fc56cbe59..23806f668d 100644 --- a/packages/tasks/src/local-apps.spec.ts +++ b/packages/tasks/src/local-apps.spec.ts @@ -36,7 +36,7 @@ llama-cli \\ -p "Once upon a time,"`); }); - it("vLLM conversational", async () => { + it("vLLM conversational llm", async () => { const { snippet: snippetFunc } = LOCAL_APPS["vllm"]; const model: ModelData = { id: "meta-llama/Llama-3.2-3B-Instruct", @@ -62,7 +62,7 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\ }'`); }); - it("vLLM non-conversational", async () => { + it("vLLM non-conversational llm", async () => { const { snippet: snippetFunc } = LOCAL_APPS["vllm"]; const model: ModelData = { id: "meta-llama/Llama-3.2-3B", @@ -84,7 +84,7 @@ curl -X POST "http://localhost:8000/v1/completions" \\ }'`); }); - it("vLLM VLM conversational", async () => { + it("vLLM conversational vlm", async () => { const { snippet: snippetFunc } = LOCAL_APPS["vllm"]; const model: ModelData = { id: "meta-llama/Llama-3.2-11B-Vision-Instruct", From fa745ebece6d3f5c1cf5dbc518cfabb3dfbc9365 Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 20 Nov 2024 10:39:58 +0100 Subject: [PATCH 16/17] llama_cpp_python use same snippet --- packages/tasks/src/model-libraries-snippets.spec.ts | 1 + packages/tasks/src/model-libraries-snippets.ts | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/tasks/src/model-libraries-snippets.spec.ts b/packages/tasks/src/model-libraries-snippets.spec.ts index 7ca049f3ff..fa87d82423 100644 --- a/packages/tasks/src/model-libraries-snippets.spec.ts +++ b/packages/tasks/src/model-libraries-snippets.spec.ts @@ -6,6 +6,7 @@ describe("model-libraries-snippets", () => { it("llama_cpp_python conversational", async () => { const model: ModelData = { id: "bartowski/Llama-3.2-3B-Instruct-GGUF", + pipeline_tag: "text-generation", tags: ["conversational"], inference: "", }; diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index 9b51878755..a9ef6e4657 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -1,6 +1,9 @@ import type { ModelData } from "./model-data.js"; import type { WidgetExampleTextInput, WidgetExampleSentenceSimilarityInput } from "./widget-example.js"; import { LIBRARY_TASK_MAPPING } from "./library-to-tasks.js"; +import { getModelInputSnippet } from "./snippets/inputs.js"; +import { ChatCompletionInputMessage } from "./tasks/index.js"; +import { stringifyMessages } from "./snippets/common.js"; const TAG_CUSTOM_CODE = "custom_code"; @@ -430,13 +433,9 @@ llm = Llama.from_pretrained( ]; if (model.tags.includes("conversational")) { + const messages = getModelInputSnippet(model) as ChatCompletionInputMessage[]; snippets.push(`llm.create_chat_completion( - messages = [ - { - "role": "user", - "content": "What is the capital of France?" - } - ] + messages = ${stringifyMessages(messages, { attributeKeyQuotes: true, indent: "\t" })} )`); } else { snippets.push(`output = llm( From 0e7cf591a48e501528555301c203e6cd6e5573dc Mon Sep 17 00:00:00 2001 From: Mishig Davaadorj Date: Wed, 20 Nov 2024 10:42:00 +0100 Subject: [PATCH 17/17] lint --- packages/tasks/src/local-apps.ts | 2 +- packages/tasks/src/model-libraries-snippets.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts index a1d2c119e1..2249183a4c 100644 --- a/packages/tasks/src/local-apps.ts +++ b/packages/tasks/src/local-apps.ts @@ -3,7 +3,7 @@ import type { ModelData } from "./model-data.js"; import type { PipelineType } from "./pipelines.js"; import { stringifyMessages } from "./snippets/common.js"; import { getModelInputSnippet } from "./snippets/inputs.js"; -import { ChatCompletionInputMessage } from "./tasks/index.js"; +import type { ChatCompletionInputMessage } from "./tasks/index.js"; export interface LocalAppSnippet { /** diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts index a9ef6e4657..bb6ac12c3c 100644 --- a/packages/tasks/src/model-libraries-snippets.ts +++ b/packages/tasks/src/model-libraries-snippets.ts @@ -2,7 +2,7 @@ import type { ModelData } from "./model-data.js"; import type { WidgetExampleTextInput, WidgetExampleSentenceSimilarityInput } from "./widget-example.js"; import { LIBRARY_TASK_MAPPING } from "./library-to-tasks.js"; import { getModelInputSnippet } from "./snippets/inputs.js"; -import { ChatCompletionInputMessage } from "./tasks/index.js"; +import type { ChatCompletionInputMessage } from "./tasks/index.js"; import { stringifyMessages } from "./snippets/common.js"; const TAG_CUSTOM_CODE = "custom_code";