From 8239d41d04081c1130a1eb4d937dcee46452d844 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 7 Oct 2024 11:39:33 +0200
Subject: [PATCH 01/17] [Local App Snippet] support non conversational LLMs

---
 packages/tasks/src/local-apps.ts | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
index edc7e64fd8..245bcf729e 100644
--- a/packages/tasks/src/local-apps.ts
+++ b/packages/tasks/src/local-apps.ts
@@ -92,15 +92,19 @@ function isMlxModel(model: ModelData) {
 }
 
 const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
-	const command = (binary: string) =>
-		[
+	const command = (binary: string) => {
+		let snippet = [
 			"# Load and run the model:",
 			`${binary} \\`,
 			`  --hf-repo "${model.id}" \\`,
 			`  --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
-			'  -p "You are a helpful assistant" \\',
-			"  --conversation",
+			`  -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time "}"`,
 		].join("\n");
+		if (model.tags.includes("conversational")) {
+			snippet += " \\\n  --conversation";
+		}
+		return snippet;
+	};
 	return [
 		{
 			title: "Install from brew",
@@ -178,7 +182,7 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
 };
 
 const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
-	const runCommand = [
+	const runCommandInstruct = [
 		"# Call the server using curl:",
 		`curl -X POST "http://localhost:8000/v1/chat/completions" \\`,
 		`	-H "Content-Type: application/json" \\`,
@@ -189,6 +193,18 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
 		`		]`,
 		`	}'`,
 	];
+	const runCommandNonInstruct = [
+		"# Call the server using curl:",
+		`curl -X POST "http://localhost:8000/v1/completions" \\`,
+		`	-H "Content-Type: application/json" \\`,
+		`	--data '{`,
+		`		"model": "${model.id}",`,
+		`		"prompt": "Once upon a time ",`,
+		`		"max_tokens": 512,`,
+		`		"temperature": 0.5`,
+		`	}'`,
+	];
+	const runCommand = model.tags.includes("conversational") ? runCommandInstruct : runCommandNonInstruct;
 	return [
 		{
 			title: "Install from pip",

From 36761d56c77e024b081e797a23cdd4705dca1c17 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 7 Oct 2024 12:07:20 +0200
Subject: [PATCH 02/17] llama_cpp_python

---
 .../tasks/src/model-libraries-snippets.ts     | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts
index 523d1a2457..72f34c1cf3 100644
--- a/packages/tasks/src/model-libraries-snippets.ts
+++ b/packages/tasks/src/model-libraries-snippets.ts
@@ -418,23 +418,36 @@ model = keras_hub.models.CausalLM.from_preset("hf://${model.id}", dtype="bfloat1
 `,
 ];
 
-export const llama_cpp_python = (model: ModelData): string[] => [
-	`from llama_cpp import Llama
+export const llama_cpp_python = (model: ModelData): string[] => {
+	let snippet = `from llama_cpp import Llama
 
 llm = Llama.from_pretrained(
 	repo_id="${model.id}",
 	filename="{{GGUF_FILE}}",
 )
 
-llm.create_chat_completion(
+`;
+
+	if (model.tags.includes("conversational")) {
+		snippet += `llm.create_chat_completion(
 	messages = [
 		{
 			"role": "user",
 			"content": "What is the capital of France?"
 		}
 	]
-)`,
-];
+)`;
+	} else {
+		snippet += `output = llm(
+	"Once upon a time ",
+	max_tokens=512,
+	echo=True
+)
+print(output)`;
+	}
+
+	return [snippet];
+};
 
 export const tf_keras = (model: ModelData): string[] => [
 	`# Note: 'keras<3.x' or 'tf_keras' must be installed (legacy)

From c94a98646ecb188a9f8ceb6fe1006817b7212853 Mon Sep 17 00:00:00 2001
From: Mishig <dmishig@gmail.com>
Date: Mon, 7 Oct 2024 13:29:19 +0200
Subject: [PATCH 03/17] Apply suggestions from code review

Co-authored-by: vb <vaibhavs10@gmail.com>
---
 packages/tasks/src/local-apps.ts               | 4 ++--
 packages/tasks/src/model-libraries-snippets.ts | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
index 245bcf729e..1e90a51ae3 100644
--- a/packages/tasks/src/local-apps.ts
+++ b/packages/tasks/src/local-apps.ts
@@ -98,7 +98,7 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[]
 			`${binary} \\`,
 			`  --hf-repo "${model.id}" \\`,
 			`  --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
-			`  -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time "}"`,
+			`  -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time"}"`,
 		].join("\n");
 		if (model.tags.includes("conversational")) {
 			snippet += " \\\n  --conversation";
@@ -199,7 +199,7 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
 		`	-H "Content-Type: application/json" \\`,
 		`	--data '{`,
 		`		"model": "${model.id}",`,
-		`		"prompt": "Once upon a time ",`,
+		`		"prompt": "Once upon a time",`,
 		`		"max_tokens": 512,`,
 		`		"temperature": 0.5`,
 		`	}'`,
diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts
index 72f34c1cf3..a0ed3c4c50 100644
--- a/packages/tasks/src/model-libraries-snippets.ts
+++ b/packages/tasks/src/model-libraries-snippets.ts
@@ -439,7 +439,7 @@ llm = Llama.from_pretrained(
 )`;
 	} else {
 		snippet += `output = llm(
-	"Once upon a time ",
+	"Once upon a time",
 	max_tokens=512,
 	echo=True
 )

From f61ac3c24d69d819cd9a50804e8d3bb48588b99c Mon Sep 17 00:00:00 2001
From: Mishig <dmishig@gmail.com>
Date: Mon, 7 Oct 2024 13:34:44 +0200
Subject: [PATCH 04/17] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Victor Muštar <victor.mustar@gmail.com>
---
 packages/tasks/src/local-apps.ts               | 2 +-
 packages/tasks/src/model-libraries-snippets.ts | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
index 1e90a51ae3..bd906e274a 100644
--- a/packages/tasks/src/local-apps.ts
+++ b/packages/tasks/src/local-apps.ts
@@ -199,7 +199,7 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
 		`	-H "Content-Type: application/json" \\`,
 		`	--data '{`,
 		`		"model": "${model.id}",`,
-		`		"prompt": "Once upon a time",`,
+		`		"prompt": "Once upon a time,",`,
 		`		"max_tokens": 512,`,
 		`		"temperature": 0.5`,
 		`	}'`,
diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts
index a0ed3c4c50..d4505cd9a6 100644
--- a/packages/tasks/src/model-libraries-snippets.ts
+++ b/packages/tasks/src/model-libraries-snippets.ts
@@ -439,7 +439,7 @@ llm = Llama.from_pretrained(
 )`;
 	} else {
 		snippet += `output = llm(
-	"Once upon a time",
+	"Once upon a time,",
 	max_tokens=512,
 	echo=True
 )

From bd09de1e4b5ce24ff967cb4d3ad2ce98b2849ee6 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 7 Oct 2024 16:09:24 +0200
Subject: [PATCH 05/17] Add test cases

---
 packages/tasks/package.json                   |  5 +-
 packages/tasks/src/local-apps.spec.ts         | 82 +++++++++++++++++++
 .../src/model-libraries-snippets.spec.ts      | 53 ++++++++++++
 3 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 packages/tasks/src/local-apps.spec.ts
 create mode 100644 packages/tasks/src/model-libraries-snippets.spec.ts

diff --git a/packages/tasks/package.json b/packages/tasks/package.json
index e9e64c0bf8..3ada081d9e 100644
--- a/packages/tasks/package.json
+++ b/packages/tasks/package.json
@@ -33,7 +33,10 @@
 		"watch": "npm-run-all --parallel watch:esm watch:cjs",
 		"prepare": "pnpm run build",
 		"check": "tsc",
-		"test": "vitest run"
+		"test": "vitest run",
+		"inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts",
+		"inference-tgi-import": "tsx scripts/inference-tgi-import.ts && prettier --write src/tasks/text-generation/spec/*.json && prettier --write src/tasks/chat-completion/spec/*.json",
+		"inference-tei-import": "tsx scripts/inference-tei-import.ts && prettier --write src/tasks/feature-extraction/spec/*.json"
 	},
 	"type": "module",
 	"files": [
diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts
new file mode 100644
index 0000000000..af71e317c0
--- /dev/null
+++ b/packages/tasks/src/local-apps.spec.ts
@@ -0,0 +1,82 @@
+import { describe, expect, it } from "vitest";
+import { LOCAL_APPS } from "./local-apps";
+import type { ModelData } from "./model-data";
+
+describe("local-apps", () => {
+	it("llama.cpp conversational", async () => {
+		const { snippet: snippetFunc } = LOCAL_APPS["llama.cpp"];
+		const model: ModelData = {
+			id: "mlabonne/gemma-2b-it-GGUF",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = snippetFunc(model);
+
+		expect(snippet[0].content).toEqual(`# Load and run the model:
+llama-cli \\
+  --hf-repo "mlabonne/gemma-2b-it-GGUF" \\
+  --hf-file {{GGUF_FILE}} \\
+  -p "You are a helpful assistant" \\
+  --conversation`);
+	});
+
+	it("llama.cpp non-conversational", async () => {
+		const { snippet: snippetFunc } = LOCAL_APPS["llama.cpp"];
+		const model: ModelData = {
+			id: "mlabonne/gemma-2b-GGUF",
+			tags: [],
+			inference: "",
+		};
+		const snippet = snippetFunc(model);
+
+		expect(snippet[0].content).toEqual(`# Load and run the model:
+llama-cli \\
+  --hf-repo "mlabonne/gemma-2b-GGUF" \\
+  --hf-file {{GGUF_FILE}} \\
+  -p "Once upon a time"`);
+	});
+
+	it("vLLM conversational", async () => {
+		const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
+		const model: ModelData = {
+			id: "meta-llama/Llama-3.2-3B-Instruct",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = snippetFunc(model);
+
+		expect((snippet[0].content as string[]).join("\n")).toEqual(`# Load and run the model:
+vllm serve "meta-llama/Llama-3.2-3B-Instruct"
+# Call the server using curl:
+curl -X POST "http://localhost:8000/v1/chat/completions" \\
+	-H "Content-Type: application/json" \\
+	--data '{
+		"model": "meta-llama/Llama-3.2-3B-Instruct",
+		"messages": [
+			{"role": "user", "content": "Hello!"}
+		]
+	}'`);
+	});
+
+	it("vLLM non-conversational", async () => {
+		const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
+		const model: ModelData = {
+			id: "meta-llama/Llama-3.2-3B",
+			tags: [""],
+			inference: "",
+		};
+		const snippet = snippetFunc(model);
+
+		expect((snippet[0].content as string[]).join("\n")).toEqual(`# Load and run the model:
+vllm serve "meta-llama/Llama-3.2-3B"
+# Call the server using curl:
+curl -X POST "http://localhost:8000/v1/completions" \\
+	-H "Content-Type: application/json" \\
+	--data '{
+		"model": "meta-llama/Llama-3.2-3B",
+		"prompt": "Once upon a time,",
+		"max_tokens": 512,
+		"temperature": 0.5
+	}'`);
+	});
+});
diff --git a/packages/tasks/src/model-libraries-snippets.spec.ts b/packages/tasks/src/model-libraries-snippets.spec.ts
new file mode 100644
index 0000000000..d1bce3fe16
--- /dev/null
+++ b/packages/tasks/src/model-libraries-snippets.spec.ts
@@ -0,0 +1,53 @@
+import { describe, expect, it } from "vitest";
+import type { ModelData } from "./model-data";
+import { llama_cpp_python } from "./model-libraries-snippets";
+
+describe("model-libraries-snippets", () => {
+	it("llama_cpp_python conversational", async () => {
+		const model: ModelData = {
+			id: "mlabonne/gemma-2b-it-GGUF",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = llama_cpp_python(model);
+
+		expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama
+
+llm = Llama.from_pretrained(
+	repo_id="mlabonne/gemma-2b-it-GGUF",
+	filename="{{GGUF_FILE}}",
+)
+
+llm.create_chat_completion(
+	messages = [
+		{
+			"role": "user",
+			"content": "What is the capital of France?"
+		}
+	]
+)`);
+	});
+
+	it("llama_cpp_python non-conversational", async () => {
+		const model: ModelData = {
+			id: "mlabonne/gemma-2b-it-GGUF",
+			tags: [""],
+			inference: "",
+		};
+		const snippet = llama_cpp_python(model);
+
+		expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama
+
+llm = Llama.from_pretrained(
+	repo_id="mlabonne/gemma-2b-it-GGUF",
+	filename="{{GGUF_FILE}}",
+)
+
+output = llm(
+	"Once upon a time,",
+	max_tokens=512,
+	echo=True
+)
+print(output)`);
+	});
+});

From 968bc02e5e66eb5a05a12021f2f47496358ed8b8 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 7 Oct 2024 16:14:02 +0200
Subject: [PATCH 06/17] prefer to use array const

---
 packages/tasks/src/local-apps.ts               |  9 +++++----
 packages/tasks/src/model-libraries-snippets.ts | 17 +++++++++--------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
index bd906e274a..8a6a9528eb 100644
--- a/packages/tasks/src/local-apps.ts
+++ b/packages/tasks/src/local-apps.ts
@@ -93,17 +93,18 @@ function isMlxModel(model: ModelData) {
 
 const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
 	const command = (binary: string) => {
-		let snippet = [
+		const snippet = [
 			"# Load and run the model:",
 			`${binary} \\`,
 			`  --hf-repo "${model.id}" \\`,
 			`  --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
 			`  -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time"}"`,
-		].join("\n");
+		];
 		if (model.tags.includes("conversational")) {
-			snippet += " \\\n  --conversation";
+			snippet[snippet.length - 1] += " \\";
+			snippet.push("  --conversation");
 		}
-		return snippet;
+		return snippet.join("\n");
 	};
 	return [
 		{
diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts
index d4505cd9a6..9b51878755 100644
--- a/packages/tasks/src/model-libraries-snippets.ts
+++ b/packages/tasks/src/model-libraries-snippets.ts
@@ -419,34 +419,35 @@ model = keras_hub.models.CausalLM.from_preset("hf://${model.id}", dtype="bfloat1
 ];
 
 export const llama_cpp_python = (model: ModelData): string[] => {
-	let snippet = `from llama_cpp import Llama
+	const snippets = [
+		`from llama_cpp import Llama
 
 llm = Llama.from_pretrained(
 	repo_id="${model.id}",
 	filename="{{GGUF_FILE}}",
 )
-
-`;
+`,
+	];
 
 	if (model.tags.includes("conversational")) {
-		snippet += `llm.create_chat_completion(
+		snippets.push(`llm.create_chat_completion(
 	messages = [
 		{
 			"role": "user",
 			"content": "What is the capital of France?"
 		}
 	]
-)`;
+)`);
 	} else {
-		snippet += `output = llm(
+		snippets.push(`output = llm(
 	"Once upon a time,",
 	max_tokens=512,
 	echo=True
 )
-print(output)`;
+print(output)`);
 	}
 
-	return [snippet];
+	return snippets;
 };
 
 export const tf_keras = (model: ModelData): string[] => [

From 524e965051e8e216a8783b74875b558d7d2c4756 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 7 Oct 2024 16:16:29 +0200
Subject: [PATCH 07/17] real examples

---
 packages/tasks/src/local-apps.spec.ts               | 4 ++--
 packages/tasks/src/model-libraries-snippets.spec.ts | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts
index af71e317c0..b0e4590299 100644
--- a/packages/tasks/src/local-apps.spec.ts
+++ b/packages/tasks/src/local-apps.spec.ts
@@ -6,7 +6,7 @@ describe("local-apps", () => {
 	it("llama.cpp conversational", async () => {
 		const { snippet: snippetFunc } = LOCAL_APPS["llama.cpp"];
 		const model: ModelData = {
-			id: "mlabonne/gemma-2b-it-GGUF",
+			id: "bartowski/Llama-3.2-3B-Instruct-GGUF",
 			tags: ["conversational"],
 			inference: "",
 		};
@@ -14,7 +14,7 @@ describe("local-apps", () => {
 
 		expect(snippet[0].content).toEqual(`# Load and run the model:
 llama-cli \\
-  --hf-repo "mlabonne/gemma-2b-it-GGUF" \\
+  --hf-repo "bartowski/Llama-3.2-3B-Instruct-GGUF" \\
   --hf-file {{GGUF_FILE}} \\
   -p "You are a helpful assistant" \\
   --conversation`);
diff --git a/packages/tasks/src/model-libraries-snippets.spec.ts b/packages/tasks/src/model-libraries-snippets.spec.ts
index d1bce3fe16..b0f2f8949e 100644
--- a/packages/tasks/src/model-libraries-snippets.spec.ts
+++ b/packages/tasks/src/model-libraries-snippets.spec.ts
@@ -5,7 +5,7 @@ import { llama_cpp_python } from "./model-libraries-snippets";
 describe("model-libraries-snippets", () => {
 	it("llama_cpp_python conversational", async () => {
 		const model: ModelData = {
-			id: "mlabonne/gemma-2b-it-GGUF",
+			id: "bartowski/Llama-3.2-3B-Instruct-GGUF",
 			tags: ["conversational"],
 			inference: "",
 		};
@@ -14,7 +14,7 @@ describe("model-libraries-snippets", () => {
 		expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama
 
 llm = Llama.from_pretrained(
-	repo_id="mlabonne/gemma-2b-it-GGUF",
+	repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
 	filename="{{GGUF_FILE}}",
 )
 
@@ -30,7 +30,7 @@ llm.create_chat_completion(
 
 	it("llama_cpp_python non-conversational", async () => {
 		const model: ModelData = {
-			id: "mlabonne/gemma-2b-it-GGUF",
+			id: "mlabonne/gemma-2b-GGUF",
 			tags: [""],
 			inference: "",
 		};
@@ -39,7 +39,7 @@ llm.create_chat_completion(
 		expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama
 
 llm = Llama.from_pretrained(
-	repo_id="mlabonne/gemma-2b-it-GGUF",
+	repo_id="mlabonne/gemma-2b-GGUF",
 	filename="{{GGUF_FILE}}",
 )
 

From de46212188857c6850863897d4f604f2741a2b6a Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 7 Oct 2024 16:17:58 +0200
Subject: [PATCH 08/17] "once upon a time," example

---
 packages/tasks/src/local-apps.spec.ts | 2 +-
 packages/tasks/src/local-apps.ts      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts
index b0e4590299..9c8c82a2aa 100644
--- a/packages/tasks/src/local-apps.spec.ts
+++ b/packages/tasks/src/local-apps.spec.ts
@@ -33,7 +33,7 @@ llama-cli \\
 llama-cli \\
   --hf-repo "mlabonne/gemma-2b-GGUF" \\
   --hf-file {{GGUF_FILE}} \\
-  -p "Once upon a time"`);
+  -p "Once upon a time,"`);
 	});
 
 	it("vLLM conversational", async () => {
diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
index 8a6a9528eb..d00cf1ae7f 100644
--- a/packages/tasks/src/local-apps.ts
+++ b/packages/tasks/src/local-apps.ts
@@ -98,7 +98,7 @@ const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[]
 			`${binary} \\`,
 			`  --hf-repo "${model.id}" \\`,
 			`  --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
-			`  -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time"}"`,
+			`  -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time,"}"`,
 		];
 		if (model.tags.includes("conversational")) {
 			snippet[snippet.length - 1] += " \\";

From 6761b2062f3b582ff120d5e42623ac2b8e155c97 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 18 Nov 2024 11:48:34 +0100
Subject: [PATCH 09/17] fix rebase

---
 packages/tasks/package.json | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/packages/tasks/package.json b/packages/tasks/package.json
index 3ada081d9e..e9e64c0bf8 100644
--- a/packages/tasks/package.json
+++ b/packages/tasks/package.json
@@ -33,10 +33,7 @@
 		"watch": "npm-run-all --parallel watch:esm watch:cjs",
 		"prepare": "pnpm run build",
 		"check": "tsc",
-		"test": "vitest run",
-		"inference-codegen": "tsx scripts/inference-codegen.ts && prettier --write src/tasks/*/inference.ts",
-		"inference-tgi-import": "tsx scripts/inference-tgi-import.ts && prettier --write src/tasks/text-generation/spec/*.json && prettier --write src/tasks/chat-completion/spec/*.json",
-		"inference-tei-import": "tsx scripts/inference-tei-import.ts && prettier --write src/tasks/feature-extraction/spec/*.json"
+		"test": "vitest run"
 	},
 	"type": "module",
 	"files": [

From 04e8f0cb2f1c8bfb72357d71d71788e844243e87 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 18 Nov 2024 11:53:21 +0100
Subject: [PATCH 10/17] fix imports

---
 packages/tasks/src/local-apps.spec.ts               | 4 ++--
 packages/tasks/src/model-libraries-snippets.spec.ts | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts
index 9c8c82a2aa..b6ebca333c 100644
--- a/packages/tasks/src/local-apps.spec.ts
+++ b/packages/tasks/src/local-apps.spec.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from "vitest";
-import { LOCAL_APPS } from "./local-apps";
-import type { ModelData } from "./model-data";
+import { LOCAL_APPS } from "./local-apps.js";
+import type { ModelData } from "./model-data.js";
 
 describe("local-apps", () => {
 	it("llama.cpp conversational", async () => {
diff --git a/packages/tasks/src/model-libraries-snippets.spec.ts b/packages/tasks/src/model-libraries-snippets.spec.ts
index b0f2f8949e..7ca049f3ff 100644
--- a/packages/tasks/src/model-libraries-snippets.spec.ts
+++ b/packages/tasks/src/model-libraries-snippets.spec.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from "vitest";
-import type { ModelData } from "./model-data";
-import { llama_cpp_python } from "./model-libraries-snippets";
+import type { ModelData } from "./model-data.js";
+import { llama_cpp_python } from "./model-libraries-snippets.js";
 
 describe("model-libraries-snippets", () => {
 	it("llama_cpp_python conversational", async () => {

From d4e7fcd4436a969f2acb8984bfdbe3e35e4ab9ce Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 18 Nov 2024 12:16:52 +0100
Subject: [PATCH 11/17] simplify strings

---
 packages/tasks/src/local-apps.ts | 45 +++++++++++++++-----------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
index d00cf1ae7f..f54864720f 100644
--- a/packages/tasks/src/local-apps.ts
+++ b/packages/tasks/src/local-apps.ts
@@ -183,34 +183,31 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
 };
 
 const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
-	const runCommandInstruct = [
-		"# Call the server using curl:",
-		`curl -X POST "http://localhost:8000/v1/chat/completions" \\`,
-		`	-H "Content-Type: application/json" \\`,
-		`	--data '{`,
-		`		"model": "${model.id}",`,
-		`		"messages": [`,
-		`			{"role": "user", "content": "Hello!"}`,
-		`		]`,
-		`	}'`,
-	];
-	const runCommandNonInstruct = [
-		"# Call the server using curl:",
-		`curl -X POST "http://localhost:8000/v1/completions" \\`,
-		`	-H "Content-Type: application/json" \\`,
-		`	--data '{`,
-		`		"model": "${model.id}",`,
-		`		"prompt": "Once upon a time,",`,
-		`		"max_tokens": 512,`,
-		`		"temperature": 0.5`,
-		`	}'`,
-	];
+	// todo: lets get the messages here dawg
+	const runCommandInstruct = `# Call the server using curl:
+curl -X POST "http://localhost:8000/v1/chat/completions" \\
+	-H "Content-Type: application/json" \\
+	--data '{
+		"model": "${model.id}",
+		"messages": [
+			{"role": "user", "content": "Hello!"}
+		]
+	}'`;
+	const runCommandNonInstruct = `# Call the server using curl:
+curl -X POST "http://localhost:8000/v1/completions" \\
+	-H "Content-Type: application/json" \\
+	--data '{
+		"model": "${model.id}",
+		"prompt": "Once upon a time,",
+		"max_tokens": 512,
+		"temperature": 0.5
+	}'`;
 	const runCommand = model.tags.includes("conversational") ? runCommandInstruct : runCommandNonInstruct;
 	return [
 		{
 			title: "Install from pip",
 			setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
-			content: [`# Load and run the model:\nvllm serve "${model.id}"`, runCommand.join("\n")],
+			content: [`# Load and run the model:\nvllm serve "${model.id}"`, runCommand],
 		},
 		{
 			title: "Use Docker images",
@@ -227,7 +224,7 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
 			].join("\n"),
 			content: [
 				`# Load and run the model:\ndocker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
-				runCommand.join("\n"),
+				runCommand,
 			],
 		},
 	];

From 0485bd1f950df7d31f69d199db9d87f053c4ac02 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 18 Nov 2024 12:16:57 +0100
Subject: [PATCH 12/17] format

---
 packages/tasks/src/index.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/packages/tasks/src/index.ts b/packages/tasks/src/index.ts
index f11661a134..c350fecd2e 100644
--- a/packages/tasks/src/index.ts
+++ b/packages/tasks/src/index.ts
@@ -51,7 +51,6 @@ export * from "./gguf.js";
 export { snippets };
 export type { InferenceSnippet } from "./snippets/index.js";
 
-
 export { SKUS, DEFAULT_MEMORY_OPTIONS } from "./hardware.js";
 export type { HardwareSpec, SkuType } from "./hardware.js";
 export { LOCAL_APPS } from "./local-apps.js";

From 2e7c0808ca75a10222d4fc9bcdda1fff7660aa19 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 18 Nov 2024 12:25:39 +0100
Subject: [PATCH 13/17] use shared example message

---
 packages/tasks/src/local-apps.spec.ts |  6 +++++-
 packages/tasks/src/local-apps.ts      | 13 +++++++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts
index b6ebca333c..a1e1752712 100644
--- a/packages/tasks/src/local-apps.spec.ts
+++ b/packages/tasks/src/local-apps.spec.ts
@@ -40,6 +40,7 @@ llama-cli \\
 		const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
 		const model: ModelData = {
 			id: "meta-llama/Llama-3.2-3B-Instruct",
+			pipeline_tag: "text-generation",
 			tags: ["conversational"],
 			inference: "",
 		};
@@ -53,7 +54,10 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
 	--data '{
 		"model": "meta-llama/Llama-3.2-3B-Instruct",
 		"messages": [
-			{"role": "user", "content": "Hello!"}
+			{
+				"role": "user",
+				"content": "What is the capital of France?"
+			}
 		]
 	}'`);
 	});
diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
index f54864720f..a1d2c119e1 100644
--- a/packages/tasks/src/local-apps.ts
+++ b/packages/tasks/src/local-apps.ts
@@ -1,6 +1,9 @@
 import { parseGGUFQuantLabel } from "./gguf.js";
 import type { ModelData } from "./model-data.js";
 import type { PipelineType } from "./pipelines.js";
+import { stringifyMessages } from "./snippets/common.js";
+import { getModelInputSnippet } from "./snippets/inputs.js";
+import { ChatCompletionInputMessage } from "./tasks/index.js";
 
 export interface LocalAppSnippet {
 	/**
@@ -183,15 +186,17 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
 };
 
 const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
-	// todo: lets get the messages here dawg
+	const messages = getModelInputSnippet(model) as ChatCompletionInputMessage[];
 	const runCommandInstruct = `# Call the server using curl:
 curl -X POST "http://localhost:8000/v1/chat/completions" \\
 	-H "Content-Type: application/json" \\
 	--data '{
 		"model": "${model.id}",
-		"messages": [
-			{"role": "user", "content": "Hello!"}
-		]
+		"messages": ${stringifyMessages(messages, {
+			indent: "\t\t",
+			attributeKeyQuotes: true,
+			customContentEscaper: (str) => str.replace(/'/g, "'\\''"),
+		})}
 	}'`;
 	const runCommandNonInstruct = `# Call the server using curl:
 curl -X POST "http://localhost:8000/v1/completions" \\

From 31632a19dd00126135861951be140ff6eabf8bb2 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 18 Nov 2024 14:00:45 +0100
Subject: [PATCH 14/17] vLLM VLM snippet support

---
 packages/tasks/src/local-apps.spec.ts | 37 +++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts
index a1e1752712..8fc56cbe59 100644
--- a/packages/tasks/src/local-apps.spec.ts
+++ b/packages/tasks/src/local-apps.spec.ts
@@ -83,4 +83,41 @@ curl -X POST "http://localhost:8000/v1/completions" \\
 		"temperature": 0.5
 	}'`);
 	});
+
+	it("vLLM VLM conversational", async () => {
+		const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
+		const model: ModelData = {
+			id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
+			pipeline_tag: "image-text-to-text",
+			tags: ["conversational"],
+			inference: "",
+		};
+		const snippet = snippetFunc(model);
+
+		expect((snippet[0].content as string[]).join("\n")).toEqual(`# Load and run the model:
+vllm serve "meta-llama/Llama-3.2-11B-Vision-Instruct"
+# Call the server using curl:
+curl -X POST "http://localhost:8000/v1/chat/completions" \\
+	-H "Content-Type: application/json" \\
+	--data '{
+		"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
+		"messages": [
+			{
+				"role": "user",
+				"content": [
+					{
+						"type": "text",
+						"text": "Describe this image in one sentence."
+					},
+					{
+						"type": "image_url",
+						"image_url": {
+							"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
+						}
+					}
+				]
+			}
+		]
+	}'`);
+	});
 });

From d6c5b5b597732253a58c58c2912479fc21d91b6c Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Mon, 18 Nov 2024 14:06:06 +0100
Subject: [PATCH 15/17] match naming

---
 packages/tasks/src/local-apps.spec.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/packages/tasks/src/local-apps.spec.ts b/packages/tasks/src/local-apps.spec.ts
index 8fc56cbe59..23806f668d 100644
--- a/packages/tasks/src/local-apps.spec.ts
+++ b/packages/tasks/src/local-apps.spec.ts
@@ -36,7 +36,7 @@ llama-cli \\
   -p "Once upon a time,"`);
 	});
 
-	it("vLLM conversational", async () => {
+	it("vLLM conversational llm", async () => {
 		const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
 		const model: ModelData = {
 			id: "meta-llama/Llama-3.2-3B-Instruct",
@@ -62,7 +62,7 @@ curl -X POST "http://localhost:8000/v1/chat/completions" \\
 	}'`);
 	});
 
-	it("vLLM non-conversational", async () => {
+	it("vLLM non-conversational llm", async () => {
 		const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
 		const model: ModelData = {
 			id: "meta-llama/Llama-3.2-3B",
@@ -84,7 +84,7 @@ curl -X POST "http://localhost:8000/v1/completions" \\
 	}'`);
 	});
 
-	it("vLLM VLM conversational", async () => {
+	it("vLLM conversational vlm", async () => {
 		const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
 		const model: ModelData = {
 			id: "meta-llama/Llama-3.2-11B-Vision-Instruct",

From fa745ebece6d3f5c1cf5dbc518cfabb3dfbc9365 Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Wed, 20 Nov 2024 10:39:58 +0100
Subject: [PATCH 16/17] llama_cpp_python use same snippet

---
 packages/tasks/src/model-libraries-snippets.spec.ts |  1 +
 packages/tasks/src/model-libraries-snippets.ts      | 11 +++++------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/packages/tasks/src/model-libraries-snippets.spec.ts b/packages/tasks/src/model-libraries-snippets.spec.ts
index 7ca049f3ff..fa87d82423 100644
--- a/packages/tasks/src/model-libraries-snippets.spec.ts
+++ b/packages/tasks/src/model-libraries-snippets.spec.ts
@@ -6,6 +6,7 @@ describe("model-libraries-snippets", () => {
 	it("llama_cpp_python conversational", async () => {
 		const model: ModelData = {
 			id: "bartowski/Llama-3.2-3B-Instruct-GGUF",
+			pipeline_tag: "text-generation",
 			tags: ["conversational"],
 			inference: "",
 		};
diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts
index 9b51878755..a9ef6e4657 100644
--- a/packages/tasks/src/model-libraries-snippets.ts
+++ b/packages/tasks/src/model-libraries-snippets.ts
@@ -1,6 +1,9 @@
 import type { ModelData } from "./model-data.js";
 import type { WidgetExampleTextInput, WidgetExampleSentenceSimilarityInput } from "./widget-example.js";
 import { LIBRARY_TASK_MAPPING } from "./library-to-tasks.js";
+import { getModelInputSnippet } from "./snippets/inputs.js";
+import { ChatCompletionInputMessage } from "./tasks/index.js";
+import { stringifyMessages } from "./snippets/common.js";
 
 const TAG_CUSTOM_CODE = "custom_code";
 
@@ -430,13 +433,9 @@ llm = Llama.from_pretrained(
 	];
 
 	if (model.tags.includes("conversational")) {
+		const messages = getModelInputSnippet(model) as ChatCompletionInputMessage[];
 		snippets.push(`llm.create_chat_completion(
-	messages = [
-		{
-			"role": "user",
-			"content": "What is the capital of France?"
-		}
-	]
+	messages = ${stringifyMessages(messages, { attributeKeyQuotes: true, indent: "\t" })}
 )`);
 	} else {
 		snippets.push(`output = llm(

From 0e7cf591a48e501528555301c203e6cd6e5573dc Mon Sep 17 00:00:00 2001
From: Mishig Davaadorj <dmishig@gmail.com>
Date: Wed, 20 Nov 2024 10:42:00 +0100
Subject: [PATCH 17/17] lint

---
 packages/tasks/src/local-apps.ts               | 2 +-
 packages/tasks/src/model-libraries-snippets.ts | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/tasks/src/local-apps.ts b/packages/tasks/src/local-apps.ts
index a1d2c119e1..2249183a4c 100644
--- a/packages/tasks/src/local-apps.ts
+++ b/packages/tasks/src/local-apps.ts
@@ -3,7 +3,7 @@ import type { ModelData } from "./model-data.js";
 import type { PipelineType } from "./pipelines.js";
 import { stringifyMessages } from "./snippets/common.js";
 import { getModelInputSnippet } from "./snippets/inputs.js";
-import { ChatCompletionInputMessage } from "./tasks/index.js";
+import type { ChatCompletionInputMessage } from "./tasks/index.js";
 
 export interface LocalAppSnippet {
 	/**
diff --git a/packages/tasks/src/model-libraries-snippets.ts b/packages/tasks/src/model-libraries-snippets.ts
index a9ef6e4657..bb6ac12c3c 100644
--- a/packages/tasks/src/model-libraries-snippets.ts
+++ b/packages/tasks/src/model-libraries-snippets.ts
@@ -2,7 +2,7 @@ import type { ModelData } from "./model-data.js";
 import type { WidgetExampleTextInput, WidgetExampleSentenceSimilarityInput } from "./widget-example.js";
 import { LIBRARY_TASK_MAPPING } from "./library-to-tasks.js";
 import { getModelInputSnippet } from "./snippets/inputs.js";
-import { ChatCompletionInputMessage } from "./tasks/index.js";
+import type { ChatCompletionInputMessage } from "./tasks/index.js";
 import { stringifyMessages } from "./snippets/common.js";
 
 const TAG_CUSTOM_CODE = "custom_code";