Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 123 additions & 0 deletions packages/tasks/src/local-apps.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import { describe, expect, it } from "vitest";
import { LOCAL_APPS } from "./local-apps.js";
import type { ModelData } from "./model-data.js";

describe("local-apps", () => {
it("llama.cpp conversational", async () => {
const { snippet: snippetFunc } = LOCAL_APPS["llama.cpp"];
const model: ModelData = {
id: "bartowski/Llama-3.2-3B-Instruct-GGUF",
tags: ["conversational"],
inference: "",
};
const snippet = snippetFunc(model);

expect(snippet[0].content).toEqual(`# Load and run the model:
llama-cli \\
--hf-repo "bartowski/Llama-3.2-3B-Instruct-GGUF" \\
--hf-file {{GGUF_FILE}} \\
-p "You are a helpful assistant" \\
--conversation`);
});

it("llama.cpp non-conversational", async () => {
const { snippet: snippetFunc } = LOCAL_APPS["llama.cpp"];
const model: ModelData = {
id: "mlabonne/gemma-2b-GGUF",
tags: [],
inference: "",
};
const snippet = snippetFunc(model);

expect(snippet[0].content).toEqual(`# Load and run the model:
llama-cli \\
--hf-repo "mlabonne/gemma-2b-GGUF" \\
--hf-file {{GGUF_FILE}} \\
-p "Once upon a time,"`);
});

it("vLLM conversational llm", async () => {
const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
const model: ModelData = {
id: "meta-llama/Llama-3.2-3B-Instruct",
pipeline_tag: "text-generation",
tags: ["conversational"],
inference: "",
};
const snippet = snippetFunc(model);

expect((snippet[0].content as string[]).join("\n")).toEqual(`# Load and run the model:
vllm serve "meta-llama/Llama-3.2-3B-Instruct"
# Call the server using curl:
curl -X POST "http://localhost:8000/v1/chat/completions" \\
-H "Content-Type: application/json" \\
--data '{
"model": "meta-llama/Llama-3.2-3B-Instruct",
"messages": [
{
"role": "user",
"content": "What is the capital of France?"
}
]
}'`);
});

it("vLLM non-conversational llm", async () => {
const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
const model: ModelData = {
id: "meta-llama/Llama-3.2-3B",
tags: [""],
inference: "",
};
const snippet = snippetFunc(model);

expect((snippet[0].content as string[]).join("\n")).toEqual(`# Load and run the model:
vllm serve "meta-llama/Llama-3.2-3B"
# Call the server using curl:
curl -X POST "http://localhost:8000/v1/completions" \\
-H "Content-Type: application/json" \\
--data '{
"model": "meta-llama/Llama-3.2-3B",
"prompt": "Once upon a time,",
"max_tokens": 512,
"temperature": 0.5
}'`);
});

it("vLLM conversational vlm", async () => {
const { snippet: snippetFunc } = LOCAL_APPS["vllm"];
const model: ModelData = {
id: "meta-llama/Llama-3.2-11B-Vision-Instruct",
pipeline_tag: "image-text-to-text",
tags: ["conversational"],
inference: "",
};
const snippet = snippetFunc(model);

expect((snippet[0].content as string[]).join("\n")).toEqual(`# Load and run the model:
vllm serve "meta-llama/Llama-3.2-11B-Vision-Instruct"
# Call the server using curl:
curl -X POST "http://localhost:8000/v1/chat/completions" \\
-H "Content-Type: application/json" \\
--data '{
"model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "Describe this image in one sentence."
},
{
"type": "image_url",
"image_url": {
"url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
}
}
]
}
]
}'`);
});
});
55 changes: 37 additions & 18 deletions packages/tasks/src/local-apps.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { parseGGUFQuantLabel } from "./gguf.js";
import type { ModelData } from "./model-data.js";
import type { PipelineType } from "./pipelines.js";
import { stringifyMessages } from "./snippets/common.js";
import { getModelInputSnippet } from "./snippets/inputs.js";
import type { ChatCompletionInputMessage } from "./tasks/index.js";

export interface LocalAppSnippet {
/**
Expand Down Expand Up @@ -92,15 +95,20 @@ function isMlxModel(model: ModelData) {
}

const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => {
const command = (binary: string) =>
[
const command = (binary: string) => {
const snippet = [
"# Load and run the model:",
`${binary} \\`,
` --hf-repo "${model.id}" \\`,
` --hf-file ${filepath ?? "{{GGUF_FILE}}"} \\`,
' -p "You are a helpful assistant" \\',
" --conversation",
].join("\n");
` -p "${model.tags.includes("conversational") ? "You are a helpful assistant" : "Once upon a time,"}"`,
];
if (model.tags.includes("conversational")) {
snippet[snippet.length - 1] += " \\";
snippet.push(" --conversation");
}
return snippet.join("\n");
};
return [
{
title: "Install from brew",
Expand Down Expand Up @@ -178,22 +186,33 @@ const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[]
};

const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
const runCommand = [
"# Call the server using curl:",
`curl -X POST "http://localhost:8000/v1/chat/completions" \\`,
` -H "Content-Type: application/json" \\`,
` --data '{`,
` "model": "${model.id}",`,
` "messages": [`,
` {"role": "user", "content": "Hello!"}`,
` ]`,
` }'`,
];
const messages = getModelInputSnippet(model) as ChatCompletionInputMessage[];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah nice!

const runCommandInstruct = `# Call the server using curl:
curl -X POST "http://localhost:8000/v1/chat/completions" \\
-H "Content-Type: application/json" \\
--data '{
"model": "${model.id}",
"messages": ${stringifyMessages(messages, {
indent: "\t\t",
attributeKeyQuotes: true,
customContentEscaper: (str) => str.replace(/'/g, "'\\''"),
})}
}'`;
const runCommandNonInstruct = `# Call the server using curl:
curl -X POST "http://localhost:8000/v1/completions" \\
-H "Content-Type: application/json" \\
--data '{
"model": "${model.id}",
"prompt": "Once upon a time,",
"max_tokens": 512,
"temperature": 0.5
}'`;
const runCommand = model.tags.includes("conversational") ? runCommandInstruct : runCommandNonInstruct;
return [
{
title: "Install from pip",
setup: ["# Install vLLM from pip:", "pip install vllm"].join("\n"),
content: [`# Load and run the model:\nvllm serve "${model.id}"`, runCommand.join("\n")],
content: [`# Load and run the model:\nvllm serve "${model.id}"`, runCommand],
},
{
title: "Use Docker images",
Expand All @@ -210,7 +229,7 @@ const snippetVllm = (model: ModelData): LocalAppSnippet[] => {
].join("\n"),
content: [
`# Load and run the model:\ndocker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`,
runCommand.join("\n"),
runCommand,
],
},
];
Expand Down
54 changes: 54 additions & 0 deletions packages/tasks/src/model-libraries-snippets.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { describe, expect, it } from "vitest";
import type { ModelData } from "./model-data.js";
import { llama_cpp_python } from "./model-libraries-snippets.js";

describe("model-libraries-snippets", () => {
it("llama_cpp_python conversational", async () => {
const model: ModelData = {
id: "bartowski/Llama-3.2-3B-Instruct-GGUF",
pipeline_tag: "text-generation",
tags: ["conversational"],
inference: "",
};
const snippet = llama_cpp_python(model);

expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama

llm = Llama.from_pretrained(
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF",
filename="{{GGUF_FILE}}",
)

llm.create_chat_completion(
messages = [
{
"role": "user",
"content": "What is the capital of France?"
}
]
)`);
});

it("llama_cpp_python non-conversational", async () => {
const model: ModelData = {
id: "mlabonne/gemma-2b-GGUF",
tags: [""],
inference: "",
};
const snippet = llama_cpp_python(model);

expect(snippet.join("\n")).toEqual(`from llama_cpp import Llama

llm = Llama.from_pretrained(
repo_id="mlabonne/gemma-2b-GGUF",
filename="{{GGUF_FILE}}",
)

output = llm(
"Once upon a time,",
max_tokens=512,
echo=True
)
print(output)`);
});
});
35 changes: 24 additions & 11 deletions packages/tasks/src/model-libraries-snippets.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import type { ModelData } from "./model-data.js";
import type { WidgetExampleTextInput, WidgetExampleSentenceSimilarityInput } from "./widget-example.js";
import { LIBRARY_TASK_MAPPING } from "./library-to-tasks.js";
import { getModelInputSnippet } from "./snippets/inputs.js";
import type { ChatCompletionInputMessage } from "./tasks/index.js";
import { stringifyMessages } from "./snippets/common.js";

const TAG_CUSTOM_CODE = "custom_code";

Expand Down Expand Up @@ -418,23 +421,33 @@ model = keras_hub.models.CausalLM.from_preset("hf://${model.id}", dtype="bfloat1
`,
];

export const llama_cpp_python = (model: ModelData): string[] => [
`from llama_cpp import Llama
export const llama_cpp_python = (model: ModelData): string[] => {
const snippets = [
`from llama_cpp import Llama

llm = Llama.from_pretrained(
repo_id="${model.id}",
filename="{{GGUF_FILE}}",
)
`,
];

llm.create_chat_completion(
messages = [
{
"role": "user",
"content": "What is the capital of France?"
}
]
)`,
];
if (model.tags.includes("conversational")) {
const messages = getModelInputSnippet(model) as ChatCompletionInputMessage[];
snippets.push(`llm.create_chat_completion(
messages = ${stringifyMessages(messages, { attributeKeyQuotes: true, indent: "\t" })}
)`);
} else {
snippets.push(`output = llm(
"Once upon a time,",
max_tokens=512,
echo=True
)
print(output)`);
}

return snippets;
};

export const tf_keras = (model: ModelData): string[] => [
`# Note: 'keras<3.x' or 'tf_keras' must be installed (legacy)
Expand Down
Loading